// generate_re2c.hpp
// Copyright (c) 2009 Ben Hanson (http://www.benhanson.net/)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
#ifndef LEXERTL_GENERATE_RE2C_HPP
#define LEXERTL_GENERATE_RE2C_HPP

#include "char_traits.hpp"
#include "consts.hpp"
#include "internals.hpp"
#include <iostream>
#include "runtime_error.hpp"
#include "size_t.hpp"
#include "state_machine.hpp"
#include <vector>

namespace boost
{
namespace lexer
{
// check whether state0_0 is referenced from any of the other states
template <typename Char>
bool need_label0_0(boost::lexer::basic_state_machine<Char> const &sm_)
{
    typedef typename boost::lexer::basic_state_machine<Char>::iterator
        iterator_type;
    iterator_type iter_ = sm_.begin();
    std::size_t states_ = iter_->states;

    for (std::size_t state_ = 0; state_ < states_; ++state_)
    {
        if (0 == iter_->bol_index || 0 == iter_->eol_index)
        {
            return true;
        }

        std::size_t const transitions_ = iter_->transitions;
        for (std::size_t t_ = 0; t_ < transitions_; ++t_)
        {
            if (0 == iter_->goto_state)
            {
                return true;
            }
            ++iter_;
        }
        if (transitions_ == 0) ++iter_;
    }
    return false;
}

template<typename CharT>
void generate_re2c (const basic_state_machine<CharT> &state_machine_,
    std::ostream &os_, const bool use_pointers_ = false,
    const bool skip_unknown_ = true, const bool optimise_parameters_ = true,
    const char *name_ = "next_token")
{
    typedef typename boost::lexer::basic_string_token<CharT> string_token;
    const detail::internals &sm_ = state_machine_.data ();

    if (sm_._lookup->size () == 0)
    {
        throw runtime_error ("Cannot generate code from an empty "
            "state machine");
    }

    std::string upper_name_ (__DATE__);
    const std::size_t lookups_ = sm_._lookup->front ()->size ();
    typename boost::lexer::basic_state_machine<CharT>::iterator iter_ =
        state_machine_.begin();
    typename boost::lexer::basic_state_machine<CharT>::iterator end_ =
        state_machine_.end();
    const std::size_t dfas_ = sm_._dfa->size ();
    std::string::size_type pos_ = upper_name_.find (' ');
    const char *iterator_ = 0;

    if (use_pointers_)
    {
        if (lookups_ == 256)
        {
            iterator_ = "const char *";
        }
        else
        {
            iterator_ = "const wchar_t *";
        }
    }
    else
    {
        iterator_ = "Iterator &";
    }

    while (pos_ != std::string::npos)
    {
        upper_name_.replace (pos_, 1, "_");
        pos_ = upper_name_.find (' ', pos_);
    }

    upper_name_ += '_';
    upper_name_ +=  __TIME__;

    pos_ = upper_name_.find (':');

    while (pos_ != std::string::npos)
    {
        upper_name_.erase (pos_, 1);
        pos_ = upper_name_.find (':', pos_);
    }

    upper_name_ = '_' + upper_name_;
    upper_name_ = name_ + upper_name_;
    std::transform (upper_name_.begin (), upper_name_.end (),
        upper_name_.begin (), ::toupper);
    os_ << "#ifndef " << upper_name_ + '\n';
    os_ << "#define " << upper_name_ + '\n';
    os_ << "// Copyright (c) 2008-2009 Ben Hanson\n";
    os_ << "//\n";
    os_ << "// Distributed under the Boost Software License, "
        "Version 1.0. (See accompanying\n";
    os_ << "// file licence_1_0.txt or copy at "
        "http://www.boost.org/LICENSE_1_0.txt)\n\n";
    os_ << "// Auto-generated by boost::lexer\n";
    os_ << "template<typename Iterator>\n";
    os_ << "std::size_t " << name_  << " (";

    if (dfas_ > 1 || !optimise_parameters_)
    {
        os_ << "std::size_t &start_state_, ";
    }

    if (use_pointers_)
    {
        os_ << iterator_ << " &";
    }
    else
    {
        os_ << iterator_;
    }

    os_ << "start_token_, ";

    if (use_pointers_)
    {
        os_ << iterator_ << " const ";
    }
    else
    {
        os_ << "const " << iterator_;
    }

    os_ << "end_, \n";
    os_ << "    std::size_t &unique_id_";

    if (sm_._seen_BOL_assertion || !optimise_parameters_)
    {
        os_ << ", bool &beg_of_line_";
    }

    os_ << ")\n";
    os_ << "{\n";
    os_ << "    static const std::size_t npos = static_cast"
        "<std::size_t>(~0);\n";
    os_ << "\n    if (start_token_ == end_)\n";
    os_ << "    {\n";
    os_ << "        unique_id_ = npos;\n";
    os_ << "        return 0;\n";
    os_ << "    }\n\n";

    if (dfas_ > 1)
    {
        os_ << "again:\n";
    }

    os_ << "    Iterator curr_ = start_token_;\n";
    os_ << "    bool end_state_ = false;\n";
    os_ << "    std::size_t id_ = npos;\n";
    os_ << "    std::size_t uid_ = npos;\n";

    if (dfas_ > 1)
    {
        os_ << "    std::size_t end_start_state_ = start_state_;\n";
    }

    if (sm_._seen_BOL_assertion)
    {
        os_ << "    bool bol_ = beg_of_line_;\n";
        os_ << "    bool end_bol_ = bol_;\n";
    }

    os_ << "    Iterator end_token_ = start_token_;\n";
    os_ << '\n';

    if (dfas_ > 1)
    {
        os_ << "    switch (start_state_)\n";
        os_ << "    {\n";

        for (std::size_t i_ = 0; i_ < dfas_; ++i_)
        {
            os_ << "    case " << i_ << ":\n";
            os_ << "        goto " << i_ << "_0;\n";
            os_ << "        // Not needed, but to prevent warnings\n";
            os_ << "        break;\n";
        }

        os_ << "    default:\n";
        os_ << "        throw std::runtime_error (\"Invalid start state!\")\n";
        os_ << "        break;\n";
        os_ << "    }\n\n";
    }

    os_ << "    ";

    if (lookups_ == 256)
    {
        os_ << "char";
    }
    else
    {
        os_ << "wchar_t";
    }

    os_ << " ch_ = 0;\n\n";

    bool need_state0_0_label = need_label0_0(state_machine_);

    for (std::size_t dfa_ = 0; dfa_ < dfas_; ++dfa_)
    {
        const std::size_t states_ = iter_->states;

        for (std::size_t state_ = 0; state_ < states_; ++state_)
        {
            const std::size_t transitions_ = iter_->transitions;
            std::size_t t_ = 0;

            if (dfas_ > 1 || dfa_ != 0 || state_ != 0 || need_state0_0_label)
            {
                os_ << "state" << dfa_ << '_' << state_ << ":\n";
            }

            if (iter_->end_state)
            {
                os_ << "    end_state_ = true;\n";
                os_ << "    id_ = " << iter_->id << ";\n";
                os_ << "    uid_ = " << iter_->unique_id << ";\n";
                os_ << "    end_token_ = curr_;\n";

                if (dfas_ > 1)
                {
                    os_ << "    end_start_state_ = " << iter_->goto_dfa <<
                        ";\n";
                }

                if (sm_._seen_BOL_assertion)
                {
                    os_ << "    end_bol_ = bol_;\n";
                }

                if (transitions_) os_ << '\n';
            }

            if (t_ < transitions_ || iter_->bol_index != boost::lexer::npos ||
                iter_->eol_index != boost::lexer::npos)
            {
                os_ << "    if (curr_ == end_) goto end;\n\n";
                os_ << "    ch_ = *curr_;\n";

                if (iter_->bol_index != boost::lexer::npos)
                {
                    os_ << "\n    if (bol_) goto state" << dfa_ << '_' <<
                        iter_->bol_index << ";\n\n";
                }

                if (iter_->eol_index != boost::lexer::npos)
                {
                    os_ << "\n    if (ch_ == '\n') goto state" << dfa_ << '_' <<
                        iter_->eol_index << ";\n\n";
                }

                os_ << "    ++curr_;\n";
            }

            for (; t_ < transitions_; ++t_)
            {
                const char *ptr_ = iter_->token._charset.c_str();
                const char *end_ = ptr_ + iter_->token._charset.size();
                char start_char_ = 0;
                char curr_char_ = 0;
                bool range_ = false;
                bool first_char_ = true;

                os_ << "\n    if (";

                while (ptr_ != end_)
                {
                    curr_char_ = *ptr_++;

                    if (*ptr_ == curr_char_ + 1)
                    {
                        if (!range_)
                        {
                            start_char_ = curr_char_;
                        }

                        range_ = true;
                    }
                    else
                    {
                        if (!first_char_)
                        {
                            if (iter_->token._negated)
                            {
                                os_ << " && ";
                            }
                            else
                            {
                                os_ << " || ";
                            }
                        }

                        first_char_ = false;

                        if (range_)
                        {
                            typename string_token::string temp_;

                            if (iter_->token._negated)
                            {
                                os_ << "!";
                            }

                            string_token::escape_char (start_char_, temp_);
                            os_ << "(ch_ >= '" << temp_;
#if defined _MSC_VER && _MSC_VER <= 1200
                            temp_.erase ();
#else
                            temp_.clear ();
#endif
                            string_token::escape_char (curr_char_, temp_);
                            os_ << "' && ch_ <= '" << temp_ << "')";
                            range_ = false;
                        }
                        else
                        {
                            typename string_token::string temp_;

                            os_ << "ch_ ";

                            if (iter_->token._negated)
                            {
                                os_ << "!=";
                            }
                            else
                            {
                                os_ << "==";
                            }

                            string_token::escape_char (curr_char_, temp_);
                            os_ << " '" << temp_ << "'";
                        }
                    }
                }

                os_ << ") goto state" << dfa_ << '_' << iter_->goto_state <<
                    ";\n\n";
                ++iter_;
            }

            if (!(dfa_ == dfas_ - 1 && state_ == states_ - 1))
            {
                os_ << "    goto end;\n";
            }

            if (transitions_ == 0) ++iter_;
        }
    }

    os_ << "end:\n";
    os_ << "    if (end_state_)\n";
    os_ << "    {\n";
    os_ << "        // return longest match\n";

    if (dfas_ > 1)
    {
        os_ << "        start_state_ = end_start_state_;\n";
    }

    if (sm_._seen_BOL_assertion && dfas_ < 2)
    {
        os_ << "        beg_of_line_ = end_bol_;\n";
    }

    os_ << "        start_token_ = end_token_;\n";

    if (dfas_ > 1)
    {
        os_ << '\n';
        os_ << "        if (id_ == 0)\n";
        os_ << "        {\n";

        if (sm_._seen_BOL_assertion)
        {
            os_ << "            bol_ = end_bol_;\n";
        }

        os_ << "            goto again;\n";
        os_ << "        }\n";

        if (sm_._seen_BOL_assertion)
        {
            os_ << "        else\n";
            os_ << "        {\n";
            os_ << "            beg_of_line_ = end_bol_;\n";
            os_ << "        }\n";
        }
    }

    os_ << "    }\n";
    os_ << "    else\n";
    os_ << "    {\n";

    if (sm_._seen_BOL_assertion)
    {
        os_ << "        beg_of_line_ = *start_token_ == '\\n';\n";
    }

    if (skip_unknown_)
    {
        os_ << "        // No match causes char to be skipped\n";
        os_ << "        ++start_token_;\n";
    }

    os_ << "        id_ = npos;\n";
    os_ << "        uid_ = npos;\n";
    os_ << "    }\n";
    os_ << '\n';
    os_ << "    unique_id_ = uid_;\n";
    os_ << "    return id_;\n";
    os_ << "}\n";
    os_ << "\n#endif\n";
}
}
}
#endif
